################################
#	John Henderson
#	Gerrymandering Incumbency 
#		(with Brian Hamel and Aaron Goldzimer)
#		January 1, 2018
#
################################
# CD partisan bias plots
################################

rm(list=ls())
library(stringr)

setwd('~/Dropbox/StateRedistricting/replication/short')   

state=chamber=NULL
flips=NULL   

#####################################################################
######## SECTION 2: CREATE THE ELECTORAL BIAS FILE
#####################################################################

library(foreign)

repMaj=function(temp,pred=T,linear,logit,model='logit',partisan=T){
	if(pred==F){
		tmp=mean(temp>0)
	} else if(pred==T){
		if(model=='logit'){
			if(partisan==T){
				tmp=(temp+1)/2		  	
			} else{
				tmp=abs(temp)
			}
			tmp=tmp*logit$coef[2]+logit$coef[1] 			
			tmp=1/(1+exp(-tmp))  
		} else if(model=='linear'){ 
			if(partisan==T){
				tmp=(temp+1)/2		  	
			} else{
				tmp=abs(temp)
			} 	  	
			tmp=tmp*linear$coef[2]+linear$coef[1] 			
			#tmp=1/(1+exp(-tmp))			
		}   		
	}    
	return(tmp)	
} 
                  
######################
# flips on since recovers the partisan margins 
###################### 

load('stateSwings.Rdata') 

flipPr=function(temp,state,year_mins,year_maxs,seats=T){
 	# assumes that temp is built from rshare-(1-rshare)
	rshare=(temp+1)/2    
	dshare=1-(temp+1)/2  

	#imagine some feasible counterfactual
	i.state=which(rownames(year_mins)==state)    
	minR=year_mins[i.state,1]/100
	maxR=year_maxs[i.state,1]/100
	minD=year_mins[i.state,2]/100
	maxD=year_maxs[i.state,2]/100  

	updat=as.numeric((as.numeric(sign((dshare+abs(maxD-mean(dshare)))-(rshare-abs(minR-mean(rshare))))!=sign(dshare-rshare))+
	as.numeric(sign((rshare+abs(maxR-mean(rshare)))-(dshare-abs(minD-mean(dshare))))!=sign(rshare-dshare)))>0)

	if(seats==T){
		return(mean(updat))
	} else{
		dshare+abs(maxD-mean(dshare))
		rshare-abs(minR-mean(rshare))		

		rshare+abs(maxR-mean(rshare)) 	
		dshare-abs(minD-mean(dshare))
	}

}    
source('analysisSTFun.R')      

######################
# cd - predicted probability at CD 
######################  

#model seat control at CD by pres vote share w/n state and 2012/2014 year
load('voteData.Rdata')
# voteData
voteData$Rseat=as.numeric(voteData[,1]<voteData[,2])
voteData12=voteData[which(voteData$"YearElec"==2012),] 
voteData12$Sflip=as.numeric(voteData$Rseat[which(voteData$"YearElec"==2012)]!=voteData$Rseat[which(voteData$"YearElec"==2014)])

rm(voteData) 
voteData12=voteData12[order(voteData12$StPOAbrv,voteData12$RPresShr,decreasing=F),]

y=voteData12$Rseat
z=voteData12$Sflip

voteData12$pShare=array(NA,length(voteData12$"StPOAbrv"))     
un_st=unique(voteData12$"StPOAbrv") 
k_get=NULL  
flips=T  

un_st=c('AK','AZ','CA','CO','FL','ID','MT','NC','NM','NV','OH','SC','TX','VA','WA')  
for(i in 1:length(un_st)){
	state=un_st[i]  
	
	chamber='CD'

	#source('~/Dropbox/StateRedistricting/Code/plans2010/analysisST.R')
	if(state!='AK' & state!='MT' & state!='OH'){
		st=analysisST(state=state,chamber=chamber,flips=flips,m0=m0,m1=1000000)	
		k_get=st$k_get
		adopted=st$adopted
	}
	
	if(!is.null(k_get)){
		temp=(k_get[adopted[1],]+1)/2
		ik=which(voteData12$"StPOAbrv"==state)
		voteData12$pShare[ik]=temp
		##(sort(k_get[adopted[1],])+1)/2
	}
	k_get=NULL
}
voteData12$difpShare=abs((voteData12$pShare)-(1-voteData12$pShar))

#linear=lm(voteData12$Rseat~voteData12$pShare+as.factor(voteData12$StPOAbrv))    
#logit=glm(voteData12$Rseat~voteData12$pShare+as.factor(voteData12$StPOAbrv),family=binomial(link=logit))

logit=linear=list()    

linear[[1]]=lm(voteData12$Rseat~voteData12$pShare)    
logit[[1]]=glm(voteData12$Rseat~voteData12$pShare,family=binomial(link=logit))
                                                                               
linear[[2]]=lm(voteData12$Sflip~voteData12$difpShare)    
logit[[2]]=glm(voteData12$Sflip~voteData12$difpShare,family=binomial(link=logit))

##############################################################################
########## CODE TO PRODUCE FIGURE 7 (LEFT AD RIGHT PLOTS)
##############################################################################
library(foreign) 
library(mice)  
library(matrixStats)

imputeNAplans=function(dists,plans=NULL){
	# imputing missing cells for partial plans
	# can't just align districts by number, since these have geographical 
	# definition; and don't have geographical boundaries for non-districts
	# in shapefiles, etc. 
	
	# hard and fast rule: sort districts on the pr(Obama 08) for whole plans, 
	# then match existing districts in partial plans to those that reduce the k-plan
	# distances
	if(is.null(plans)){
		warning('No index for plans is included: care is required to map back onto the various alternatives.')
	}   
	# length of datas
	m=length(dists)
	n=nrow(dists[[1]])  
	# length of completes
	cm=0
	#mu=array(NA,m)  
	is.cm=array(FALSE,m)  
	ord.dists=list() 
	ord.mat=matrix(NA,n,length(dists))
	for(i in 1:m){
		if(all(complete.cases(dists[[i]]))){
			cm=cm+1
			is.cm[i]=T
			mu=dists[[i]][,1]/(dists[[i]][,1]+dists[[i]][,2])
			ord.dists[[i]]=dists[[i]][order(mu),]
			
			ord.mat[,i]=t(t(sort(mu)))
		}
	}   
	
	for(i in 1:m){  
		if(is.cm[i]==F){
			m0=sort(dists[[i]][,1]/(dists[[i]][,1]+dists[[i]][,2]),na.last=T)
			n0=as.numeric(names(m0))
			n1=array(NA,n)
			for(p in 1:length(m0)){ 
				if(is.na(m0[p])){
					break()
				}
				dd=sqrt(rowSums((ord.mat-m0[p])^2,na.rm=T)) 
				if(length(which(!is.na(n1)))>0){
					dd[1:max(which(!is.na(n1)))]=NA
				}
				n1[which(dd==min(dd,na.rm=T))]=n0[p]
				
				#if dd object is approaching length(m0) *too fast*
				if(p<length(m0[!is.na(m0)]) & max(which(is.na(n1)))<length(m0)){
					n1=c(n1[-c(max(which(is.na(n1))))],NA)
				} 			      			
			}
			n1[which(is.na(n1))]=sample(n0[which(is.na(m0))],replace=F)	
			ord.dists[[i]]=dists[[i]][n1,]		   		
		}
	}
	 
	# quasi-randomization simulation : plans that have missing districts
	#  will have missings imputed for *all* the other plans that have those 
	#  districts : average district bias will be recorded ... 
	# complete plans are analyzed as is
	dist_insa=matrix(NA,n,1)
	dist_ins=matrix(NA,n,m) 
	colnames(dist_ins)=plans
	for(i in 1:m){  
		if(is.cm[i]==F){
			qn=ord.dists[[i]][,1]/(ord.dists[[i]][,1]+ord.dists[[i]][,2])-ord.dists[[i]][,2]/(ord.dists[[i]][,1]+ord.dists[[i]][,2])			
		  	mat_temp=matrix(qn,n,sum(is.cm))
			jj=0
			for(j in which(is.cm)){
				jj=jj+1
				qs=ord.dists[[j]][,1]/(ord.dists[[j]][,1]+ord.dists[[j]][,2])-ord.dists[[j]][,2]/(ord.dists[[j]][,1]+ord.dists[[j]][,2])							
				mat_temp[which(is.na(qn)),jj]=qs[which(is.na(qn))]				
			}                                                     
			colnames(mat_temp)=paste(plans[i],'sim',1:jj,sep='_')
			if(mean(complete.cases(dist_insa))==0){
				dist_insa=mat_temp
			} else {
				dist_insa=cbind(dist_insa,mat_temp)
			}
		} else {
			dist_ins[,i]=ord.dists[[i]][,1]/(ord.dists[[i]][,1]+ord.dists[[i]][,2])-ord.dists[[i]][,2]/(ord.dists[[i]][,1]+ord.dists[[i]][,2])			
		}
	} 
	if(mean(is.cm)<1){
		dist_ins=cbind(dist_ins[,which(is.cm==T)],dist_insa)
	}
	return(dist_ins) 
}
 
############################
#
# counterfactual maps 2010
#
############################
              
           
states=c(
  "AL","AK","AZ","AR","CA",  "CO","CT","DE","FL",  "GA","HI","ID","IL","IN",  "IA","KS","KY","LA","ME",  "MT","NE","NV","NH","NJ", "OR",  
  "NM","NY","NC","ND","OH",  "OK","MD","MA","MI",  "MN","MS","MO","PA","RI",  "SC","SD","TN","TX","UT",  "VT","VA","WA","WV","WI", "WY")

states=sort(states)
     
plans=read.csv('redist_authority.csv',stringsAsFactors=F)    
plans10=plans[which(plans[,1]==2010),]
included=array(FALSE,nrow(plans10))
for(i in 1:length(states)){
	included[which(states[i]==plans10[,2])]=TRUE
}

plans10=plans10[included,]
plans10=plans10[order(plans10[,2]),]

plansCD10 = array(NA,length(states))
plansCD10[which(plans10$congress_court==1)]='C'
plansCD10[which(plans10$congress_independent==1)]='I'
plansCD10[which(plans10$congress_politician==1)]='B'
plansCD10[which(is.na(plansCD10) & plans10$congress_legislatue_partycontrol=='nonpartisan')]='B' # NE
plansCD10[which(is.na(plansCD10) & plans10$congress_legislatue_partycontrol=='split')]='B'
plansCD10[which(is.na(plansCD10) & plans10$congress_legislatue_partycontrol=='D')]='D'
plansCD10[which(is.na(plansCD10) & plans10$congress_legislatue_partycontrol=='R')]='R'
plansCD10[which(is.na(plansCD10))]='N'

plansAD10 = array(NA,length(states))
plansAD10[which(plans10$state_court==1)]='C'
plansAD10[which(plans10$state_independent==1)]='I'
plansAD10[which(plans10$state_politician==1)]='B'
plansAD10[which(is.na(plansAD10) & plans10$state_legislature_partycontrol=='nonpartisan')]='B' # NE
plansAD10[which(is.na(plansAD10) & plans10$state_legislature_partycontrol=='split')]='B'
plansAD10[which(is.na(plansAD10) & plans10$state_legislature_partycontrol=='D')]='D'
plansAD10[which(is.na(plansAD10) & plans10$state_legislature_partycontrol=='R')]='R'
plansAD10[which(is.na(plansAD10))]='N'
 
plansSD10=plansAD10
                
states_cf=c('AK','AZ','CA','CO','FL','FL','ID','MT','NC','NC','NM','NV','OH','SC','TX','VA','WA')    
ix=c(which(states=='AK'),
	which(states=='AZ'),
	which(states=='CA'),
	which(states=='CO'),	
	which(states=='FL'),
	which(states=='FL'),	
	which(states=='ID'),
	which(states=='MT'),
	which(states=='NC'),
	which(states=='NC'),	 
	which(states=='NM'),
	which(states=='NV'),	 	
	which(states=='OH'),	
	which(states=='SC'),
	which(states=='TX'),
	which(states=='VA'),
	which(states=='WA')) 

plansCD10=plansCD10[ix] 
plansCD10[which(states_cf=='AK')]='I'
plansCD10[which(states_cf=='MT')]='I'
plansCD10[which(states_cf=='FL')[2]]='C'


#'CA'
#'CO'     
#plansCD10[ix]
#plansAD10[ix]

#################
# cd
#################

states_ix=states_cf[c(which(plansCD10=='D'),which(plansCD10=='R'),which(plansCD10=='B'),which(plansCD10=='C'),which(plansCD10=='I'))]  

e08=read.table('sims2010/cd.txt')
nms=e08[1,]
e08=as.data.frame(e08[-c(1),])
names(e08)=t(t(nms))

e08$mcnshare=as.numeric(as.character(e08[,10]))/(as.numeric(as.character(e08[,9]))+as.numeric(as.character(e08[,10])))                     
e08$dif=abs((1-e08[,11])-e08[,11])      

el08=repMaj(e08$mcnshare-(1-e08$mcnshare),model='logit',logit=logit[[2]],partisan=F)        

election08=tapply(e08$dif,e08[,1],mean)
el08=tapply(el08,e08[,1],mean)  

el08=el08[-c(which(names(el08)=='st'))]
election08=election08[-c(which(names(election08)=='st'))]

t.sims=sims=matrix(NA,length(states),211)
for(j in 1:length(states)){     

	fls=paste('sims2010/',states[j],'/plans/',length(which(e08[,1]==states[j])),'.txt',sep='')   
	if(file.exists(fls)){
		cds=read.table(fls)[,-c(1)]        
	} else if(!file.exists(fls)){
		fla=fls                        
		pp=0
		while(!file.exists(fla) & pp<60){
			pp=pp+1
			fla=paste('sims2010/',states[j],'/plans/',pp,'.txt',sep='')   					
		}
		fls=fla 
		if(file.exists(fls)){ 
			cds=read.table(fls)[,-c(1)]     
		} else{
			next(paste(states[j],'has no simulations',sep=' '))
		}
	}          

	t.sims[j,1]=sims[j,1]=states[j]   
		
	if(length(which(e08[,1]==states[j]))>1){ 
		rshare=t(apply(cds,1,sort))
		dshare=1-rshare
		
		sims[j,-c(1)][1:dim(cds)[1]]=rowMeans(abs((1-cds)-cds))	
		
		xsim=apply((rshare-dshare),2,repMaj,model='logit',logit=logit[[2]],partisan=F) 	
		t.sims[j,-c(1)][1:dim(xsim)[1]]=rowMeans(xsim)
		
	} else{
		rshare=cds	
		dshare=1-rshare
		
		sims[j,-c(1)][1:length(cds)]=(abs((1-cds)-cds)) 
		xsim=repMaj(rshare-dshare,model='logit',logit=logit[[2]],partisan=F) 		
		t.sims[j,-c(1)][1:length(xsim)]=xsim
	}   
	    	
}

sims=as.data.frame(sims[,1:201])  
t.sims=as.data.frame(t.sims[,1:201])  
for(j in 2:ncol(sims)){
	sims[,j]=as.numeric(as.character(sims[,j])) 
	t.sims[,j]=as.numeric(as.character(t.sims[,j])) 	
}


xx=table(e08[,1])
for(i in 1:length(xx)){
	if(xx[i]==1){
		sims[which(names(xx)[i]==sims[,1]),-c(1)]=0  
		election08[which(names(xx)[i]==names(election08))]=0   
		
		t.sims[which(names(xx)[i]==t.sims[,1]),-c(1)]=0  
		el08[which(names(xx)[i]==names(el08))]=0
	}
}

is.even <- function(x) x %% 2 == 0 
     
#############################  
## Florida has multiple plans and so need to adjust for the court map in 2015/2016
#############################
#folders=paste('CD_plans/',system('ls ~/Dropbox/StateRedistricting/Data/plans2010/FL/CD/CD_plans/',intern=T),sep='')
#full_plans=read.csv('~/Dropbox/StateRedistricting/Data/plans2010/FL/scraped/PlanDataIns.csv',header=F,stringsAsFactors=F)
#dates=array(NA,length(folders))
#folders=gsub(folders,pattern='CD_plans/',replace='') 
#for(i in 2:length(folders)){
#	ix=which(gsub(folders[i],pattern='_adopted',replace='')==full_plans[,1])
#	dates[i]=str_sub(full_plans[ix,3],7)
#}
#dates[1]='2002'
#save(dates,folders,file='~/Dropbox/StateRedistricting/replication/short/CD_FL_dates.Rdata')
load('plans2010/CD_FL_dates.Rdata')  
#############################

######################
# cd - plots predicted probability
######################
# turning on flips in analysisST.R
flips=T 
#xMat=voteData12[which(voteData12$St==state),] 
  

{
pdf(width=5,height=6.5,file='cd_2010-partisan.pdf')
plot(x=-1000, y=-1000, type='n', xlim=c(-.18, 1), ylim=c(.5,length(states_ix)+.5), axes=F, ylab="", xlab="")#, main=paste("Margins of Victory in House Elections\n",state,sep=""))
axis(1,at=c(0,.25,.5,.75,1),labels=c(0,.25,.5,.75,1))
#cnts=0
cnts=length(states_ix)+1       
nls=fls=0   
#Means and Medians
for (state in states_ix){
	cnts=cnts-1
    chamber='CD'
	#source('~/Dropbox/StateRedistricting/Code/plans2010/analysisST.R')
    # adopted & k_get are the relevant objects  
	# built in .. 
   
	if(state!='AK' & state!='OH' & state!='MT'){     
		st=analysisST(state=state,chamber=chamber,flips=flips,m0=m0,m1=1000000)	
		k_get=st$k_get
		adopted=st$adopted     
	}

	if(state=='VA'){
		adopted=adopted[1]
	}         

	sts=state
	if(state=='FL' & fls==0){
		folds=folders[which(as.numeric(dates)<2015)] 
		inc=c()
		for(i in 1:length(folds)){
			inc=c(inc,grep(rownames(k_get),pattern=folds[i]))  
		}
		k_get=k_get[sort(inc),]		
		fls=fls+1
		adopted=grep(rownames(k_get),pattern='adopted') 		
		#sts=paste(sts,'\n(2011)',sep='')            
		yar=2011
	} else if(state=='FL' & fls==1){
		folds=folders[which(as.numeric(dates)>2014)]
		inc=c()
		for(i in 1:length(folds)){
			inc=c(inc,grep(rownames(k_get),pattern=folds[i]))  
		}
		k_get=k_get[sort(inc),]		
		adopted=grep(rownames(k_get),pattern='adopted')
		#sts=paste(sts,'\n(2015)',sep='')	 
		yar=2015
	} 

	if(state=='NC' & nls==0){
		k_get=k_get[-c(adopted[2]),]
		adopted=grep(rownames(k_get),pattern='adopted')
		yar=2011  
		nls=nls+1       	 
	} else if(state=='NC' & nls==1){  
		k_get=k_get[-c(adopted[1]),]
		adopted=grep(rownames(k_get),pattern='adopted')
		yar=2016		
	}

   	if(state!='AK' & state!='OH' & state!='MT'){  
		# remove those with extra but simular adopted plans
		if(length(grep(rownames(k_get),pattern='adopted'))>1){
			rownames(k_get)[grep(rownames(k_get),pattern='adopted')[-c(1)]]=gsub(rownames(k_get)[grep(rownames(k_get),pattern='adopted')[-c(1)]],pattern='adopted_',replace='')
			adopted=adopted[1]
		}  

		temp.sims = k_get[-c(adopted),]
		temp.10 = k_get[adopted,] 

		#order margins
		#for(i in 1:nrow(temp.sims)){
		#	temp.sims[i,]=sort(temp.sims[i,])
		#}
    	#temp.10=sort(temp.10) 	

		#means.sim=apply((temp.sims-(1-temp.sims)),1,flipPr,state,year_mins,year_maxs)     
		#mean.10=flipPr((temp.10-(1-temp.10)),state,year_mins,year_maxs)
		#ss.dif=mean(mean.10-means.sim)+.001   
		# s=100 *jittered simulations*
		#f=mean((year_maxs[state,]-year_mins[state,])/200) 		
		#means.sim=list()
		#for(s in 1:100){
		#	means.sim[[s]]=apply(jitter(temp.sims,factor=7500),1,repMaj)     			
		#}
		#apply(temp.sims,1,repMaj)
		
		# smooth with a model, but need R seat control 
		means.sim=colMeans(apply(temp.sims,1,repMaj,model='logit',logit=logit[[1]]))
		#means.sim=apply(temp.sims,1,repMaj,model='logit',logit=logit) 
		mean.10=mean(repMaj(temp=temp.10,model='logit',logit=logit[[1]]))
		#mean.10=mean(repMaj(temp=temp.10,model='logit',logit=logit)>.5)
		ss.dif=mean(mean.10-means.sim)+.001

	}
      
	if(state=='AK' | state=='OH' | state=='MT'){
		means.sim=c()
		mean.10=c() 
	    ss.dif=c()
	}    

	if(length(ss.dif)>0){     
		#if(ss.dif>0){
		p.dif=1-sum(mean.10<means.sim)/length(means.sim)    
	} else{                                                     
		p.dif=1
		#sum(mean.10>means.sim)/length(means.sim)    
	} 

	cols='red' 
	ltys=2
	if(p.dif<.05){
		cols='red' 
		ltys=1
	}  

	if(length(mean.10)>0){ 	  
		points(x=means.sim, y=jitter(factor=.2,rep(cnts,length(means.sim))), col='darkblue', lwd=2,pch=20,cex=.52)
		points(y=cnts, x=mean.10, col=cols, pch=1,cex=abs(ss.dif)*8+.58,lty=2) 			
		text(col='black',adj=c(0,NA),x=.015,y=cnts,labels=paste('(',str_sub(ss.dif,1,str_locate(ss.dif,pattern='[/.]')[1]+2),')',sep=''),srt = 1, pos = 2,cex=.5)				
    } else{
		text(col='black',adj=c(0,NA),x=.015,y=cnts,labels='NA',srt = 1, pos = 2,cex=.5)				
	} 
	text(adj=c(0,NA),x=-.11,y=cnts,labels=as.character(state),srt = 1, pos = 2,cex=.65)	

	#text(adj=c(0,NA),x=-.055,y=cnts,labels=as.character(sts),srt = 1, pos = 2,cex=.65)   
	if(state=='FL' | state=='NC'){
		text(adj=c(.5,.5),x=-.106,y=-.28+cnts,labels=paste('(',as.character(yar),')',sep=''),srt = 1, pos = 2,cex=.475)   		
	}  
	#rm(k_get)
}         

# lines and legend graphics   
lines(col='grey',lty=2,y=c(.5,.5),x=c(-.1515,1.))
lines(col='grey',lty=2,y=c(6.5,6.5),x=c(-.1515,1.))  
lines(col='grey',lty=2,y=c(11.5,11.5),x=c(-.1515,1.))   
lines(col='grey',lty=2,y=c(17.5,17.5),x=c(-.1515,1.))

axis(2,line=-1.5,at=c(.5,6.5,11.5,17.5),labels=F,tck=-.02)

text(x=1.-.0105,y=17.05,labels='Republican',cex=.75,adj=c(1,1),srt=0)
text(x=1.-.0105,y=11.05,labels='Court',cex=.75,adj=c(1,1),srt=0) 
text(x=1.-.0105,y=6.05,labels='Independent',cex=.75,adj=c(1,1),srt=0) 
#mtext("Mean Expected Margin of Victory",side=1,at=.25,adj=.5,line=3)  
mtext("Proportion of Republican Seat Share",side=1,at=.5,adj=.5,line=3)   

dev.off()
}  
        
# end counterfactual for CDS